import glob
import json
from tqdm import tqdm

input_files = glob.glob('/workspace/psycho_trainning/data/raw_data/action_response_data/0808/*.json')
output = []


for f in tqdm(input_files):
    session_id = f.split('/')[-1].replace('.json', '')

    corpus = json.load(open(f, 'r', encoding='utf-8'))
    dialog = corpus['dialog']

    
    for i, turn in enumerate(dialog):
        no_modified_bot_response = turn['bot_response']
        modified_bot_response = "".join([t['content'] for t in turn['new_outputs']])

        turn_id = turn['turn_id']
        if turn['skip'] == 1:
            continue
            
        actions = turn['new_action'] if 'new_action' in turn else turn['actions']
        model_output = turn['new_outputs'] if 'new_outputs' in turn else turn['model_output']
        
        for o in model_output:
            
            if "重构" in o['action']:
                output.append(
                    {
                        "session_id": session_id,
                        "turn_id": turn_id,
                        "action": o['action'],
                        "response": o['content']
                    }
                )


json.dump(output, open('./rule_turn_id_reformat.json', 'w', encoding='utf-8'), ensure_ascii=False)